Chapter 9 HMSC analysis

9.1 Load data

load("data/data.Rdata")

9.2 Compute variance partitioning

# Select modelchain of interest
load("hmsc_bookdown/fit_model1_250_10.Rdata")

# Compute variance partitioning
varpart=computeVariancePartitioning(m)

varpart$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(variable=factor(variable, levels=c("Elevation","I(Elevation^2)","logseqdepth","Transect","Random: Sampling_point"))) %>%
   group_by(variable) %>%
   summarise(mean=mean(value)*100,sd=sd(value)*100) %>%
   tt()
tinytable_f6g6t2o2gezjyxwbze9u
variable mean sd
Elevation 28.089624 8.112544
I(Elevation^2) 29.613909 10.988159
logseqdepth 6.752721 6.181533
Transect 8.901872 8.504833
Random: Sampling_point 26.641874 19.523045
# Basal tree
varpart_tree <- genome_tree %>%
        keep.tip(., tip=m$spNames)

#Varpart table
varpart_table <- varpart$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(variable=factor(variable, levels=rev(c("Elevation","I(Elevation^2)","logseqdepth","Transect","Random: Sampling_point")))) %>%
   mutate(genome=factor(genome, levels=rev(varpart_tree$tip.label)))

#Phylums
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% varpart_tree$tip.label) %>%
    arrange(match(genome, varpart_tree$tip.label)) %>%
    mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
    column_to_rownames(var = "genome") %>%
    select(phylum)


colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% varpart_tree$tip.label) %>%
    arrange(match(genome, varpart_tree$tip.label)) %>%
     select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Basal ggtree
varpart_tree <- varpart_tree %>%
        force.ultrametric(.,method="extend") %>%
        ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
# Add phylum colors next to the tree tips
varpart_tree <- gheatmap(varpart_tree, phylum_colors, offset=-0.2, width=0.1, colnames=FALSE) +
   scale_fill_manual(values=colors_alphabetic)+
      labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
varpart_tree <- varpart_tree + new_scale_fill()

# Add variance stacked barplot
vertical_tree <-  varpart_tree +
        scale_fill_manual(values=c("#34738f","#cccccc","#ed8a45","#b2b530","#be3e2b","#83bb90","#f6de6c", "#122f3d"))+
        geom_fruit(
             data=varpart_table,
             geom=geom_bar,
             mapping = aes(x=value, y=genome, fill=variable, group=variable),
             pwidth = 2,
             offset = 0.05,
             width= 1,
             orientation="y",
             stat="identity",
             color = "white",
             size=0.1)+
      labs(fill="Variable")

vertical_tree

9.3 Model fit

MFCV <- evaluateModelFit(hM=m, predY=cv)

mean(MFCV$R2, na.rm = TRUE)
[1] 0.1166458
genome_fit <- tibble(genome=m$spNames, r2 = MFCV[[2]])

genome_counts_filt %>% 
mutate_if(is.numeric, ~ . / sum(.)) %>% 
left_join(genome_fit, by="genome") %>% 
filter(r2>0.5) %>% 
select(-c(genome,r2)) %>% 
colSums() %>%  
hist()

# Select desired support threshold
support=0.9
negsupport=1-support

# Basal tree
postestimates_tree <- genome_tree %>%
        keep.tip(., tip=m$spNames)

#plotBeta(hM=m, post=getPostEstimate(hM=m, parName="Beta"), param = "Support", plotTree = TRUE, covNamesNumbers=c(1,0))

# Posterior estimate table
post_beta <- getPostEstimate(hM=m, parName="Beta")$support %>%
    as.data.frame() %>%
    mutate(variable=m$covNames) %>%
    pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
    mutate(genome=factor(genome, levels=rev(postestimates_tree$tip.label))) %>%
    mutate(value = case_when(
          value >= support ~ "Positive",
          value <= negsupport ~ "Negative",
          TRUE ~ "Neutral")) %>%
    mutate(value=factor(value, levels=c("Positive","Neutral","Negative"))) %>%
    pivot_wider(names_from = variable, values_from = value) %>%
    rename(intercept=2,
             Elevation=3,
           Elevation_2=4,
           TransectAran=5,
           TransectSentein=6,
           TransectTourmalet=7,
             logseqdepth=8
           ) %>%
    select(genome,intercept,Elevation,Elevation_2,TransectAran,TransectSentein,TransectTourmalet,logseqdepth) %>%
    column_to_rownames(var="genome")

#Phylums
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% postestimates_tree$tip.label) %>%
    arrange(match(genome, postestimates_tree$tip.label)) %>%
    mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
    column_to_rownames(var = "genome") %>%
    select(phylum)


colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% postestimates_tree$tip.label) %>%
    arrange(match(genome, postestimates_tree$tip.label)) %>%
     select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Basal ggtree
postestimates_tree <- postestimates_tree %>%
        force.ultrametric(.,method="extend") %>%
        ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#Add phylum colors next to the tree tips
postestimates_tree <- gheatmap(postestimates_tree, phylum_colors, offset=-0.2, width=0.1, colnames=FALSE) +
      scale_fill_manual(values=colors_alphabetic)+
      labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
postestimates_tree <- postestimates_tree + new_scale_fill()

# Add posterior significant heatmap

postestimates_tree <- gheatmap(postestimates_tree, post_beta, offset=0, width=0.5, colnames=TRUE, colnames_position="top",colnames_angle=90, colnames_offset_y=1, hjust=0) +
        scale_fill_manual(values=c("#be3e2b","#f4f4f4","#b2b530"))+
        labs(fill="Trend")

postestimates_tree +
        vexpand(.25, 1) # expand top

#Compute the residual correlation matrix
OmegaCor = computeAssociations(m)

# Reference tree (for sorting genomes)
genome_tree_subset <- genome_tree %>%
        keep.tip(., tip=m$spNames)


#Co-occurrence matrix at the animal level
supportLevel = 0.95
toPlot = ((OmegaCor[[1]]$support>supportLevel)
          + (OmegaCor[[1]]$support<(1-supportLevel))>0)*OmegaCor[[1]]$mean

matrix <- toPlot %>%
      as.data.frame() %>%
      rownames_to_column(var="genome1") %>%
      pivot_longer(!genome1, names_to = "genome2", values_to = "cor") %>%
      mutate(genome1= factor(genome1, levels=genome_tree_subset$tip.label)) %>%
      mutate(genome2= factor(genome2, levels=genome_tree_subset$tip.label)) %>%
      ggplot(aes(x = genome1, y = genome2, fill = cor)) +
            geom_tile() +
            scale_fill_gradient2(low = "#be3e2b",
                       mid = "#f4f4f4",
                       high = "#b2b530")+
            theme_void()

vtree_top <- genome_tree_subset %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(layout = "rectangular") + 
  layout_dendrogram() +  # Ensure correct layout
  coord_flip()
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
vtree_left <- genome_tree_subset %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(.)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#create composite figure
grid.arrange(grobs = list(vtree_top,matrix,vtree_left),
             layout_matrix = rbind(c(4,1,1,1,1,1,1,1,1,1,1,1),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2),
                                   c(3,2,2,2,2,2,2,2,2,2,2,2)))

9.4 Phylogenetic signal

mpost <- convertToCodaObject(m)
quantile(unlist(mpost$Rho), probs = c(.05,.5,.95))
  5%  50%  95% 
0.99 1.00 1.00 

9.5 Elevation predictions

gradient = seq(940, 2350, by = 100)
gradientlength = length(gradient)

#Treatment-specific gradient predictions
pred_elevation <- constructGradient(m,
                      focalVariable = "Elevation",
                      non.focalVariables = list(logseqdepth=list(1)),
                      ngrid=gradientlength) %>%
                      predict(m, Gradient = ., expected = TRUE) %>%
                      do.call(rbind,.) %>%
                      as.data.frame() %>%
                      mutate(elevation=rep(gradient,1000)) %>%
                      pivot_longer(-c(elevation), names_to = "genome", values_to = "value")
# weights:  12 (6 variable)
initial  value 145.560908 
iter  10 value 137.337331
final  value 137.337291 
converged

9.5.1 Responses to elevation

# Select desired support threshold
support=0.9
negsupport=1-support

#Get phylum colors from the EHI standard
phylum_colors <- genome_metadata %>%
    left_join(read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv"), by=join_by(phylum == phylum)) %>%
    arrange(match(genome, genome_tree$tip.label)) %>%
    select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    #slice(2:5) %>%
    select(colors) %>%
    pull()

getPostEstimate(hM=m, parName="Beta")$support %>%
    as.data.frame() %>%
    mutate(variable=m$covNames) %>%
    pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
    mutate(trend = case_when(
          value >= support ~ "Positive",
          value <= negsupport ~ "Negative",
          TRUE ~ "Neutral")) %>%
    filter(variable=="Elevation") %>%
    select(genome,trend) %>%
    left_join(pred_elevation, by=join_by(genome==genome)) %>%
    group_by(genome, trend, elevation) %>%
    summarize(value = mean(value, na.rm = TRUE)) %>%
    left_join(genome_metadata, by=join_by(genome == genome)) %>%
    ggplot(aes(x=elevation, y=value, group=genome, color=phylum, linetype=trend)) +
        geom_line() +
        scale_linetype_manual(values=c("solid","dashed","solid")) +
        scale_color_manual(values=phylum_colors) +
        facet_grid(fct_rev(trend) ~ phylum) +
        labs(y="Genome abundance (log)",x="Elevation") +
        theme(legend.position = "none") +
        theme_minimal() +
         theme(legend.position = "none",
               axis.text.x = element_text(angle = 45, hjust = 0.8,),
               axis.line.x = element_line(size = 0.5, linetype = "solid", colour = "black"),
)

9.6 Functional predictions

9.6.1 Element level

elements_table <- genome_gifts_filt %>%
    to.elements(., GIFT_db) %>%
    as.data.frame()

community_elements <- pred_elevation %>%
  group_by(elevation, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "elevation") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(elements_table,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="elevation")
   })

calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

element_predictions <- map_dfc(community_elements, function(mat) {
      mat %>%
        column_to_rownames(var = "elevation") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_elements[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)
# Positively associated
p0<-positive_filtered<-element_predictions %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) 

if (nrow(positive_filtered) > 0) {
  positive_filtered %>%
  tt() |>
      style_tt(
        i = which(element_predictions$positive_support < 0.9 & element_predictions$negative_support < 0.1),
        background = "#E5D5B1") |>
      style_tt(
        i = which(element_predictions$negative_support > 0.9 & element_predictions$positive_support < 0.1),
        background = "#B7BCCE")
}


#Negatively associated
p1<-negative_filtered<-element_predictions %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) 

if (nrow(negative_filtered) > 0) {
  negative_filtered %>%
  tt() |>
      style_tt(
        i = which(element_predictions$positive_support < 0.9 & element_predictions$negative_support < 0.1),
        background = "#E5D5B1") |>
      style_tt(
        i = which(element_predictions$negative_support > 0.9 & element_predictions$positive_support < 0.1),
        background = "#B7BCCE")
}
# Positively associated
p0 %>%
  tt()
tinytable_9bve24mt62c7mo2ipoho
trait mean p1 p9 positive_support negative_support
D0613 0.034264683 0.0113319085 0.060146258 0.971 0.029
B0106 0.017715307 0.0039875628 0.033388901 0.966 0.034
D0609 0.014654471 0.0028010131 0.027577745 0.962 0.038
D0205 0.008600377 0.0020467751 0.015207831 0.953 0.047
D0207 0.031552241 0.0069001701 0.067737031 0.949 0.051
B0802 0.037714918 0.0086104696 0.071624679 0.947 0.053
D0304 0.014938085 0.0035430830 0.027059303 0.945 0.055
B0214 0.019893750 0.0026561423 0.037824383 0.931 0.069
B0217 0.013929202 0.0015844388 0.028027942 0.925 0.075
D0817 0.004336948 0.0003403666 0.008888889 0.919 0.081
#Negatively associated
p1%>%
  tt()
tinytable_40w7stzrccpxd2efgjp3
trait mean p1 p9 positive_support negative_support
B0208 -0.0344477317 -0.0618098460 -1.039294e-02 0.016 0.984
S0104 -0.0209740208 -0.0337504690 -8.006082e-03 0.022 0.978
D0805 -0.0004581602 -0.0009344457 -6.018417e-05 0.035 0.965
B0710 -0.0061948836 -0.0108430396 -1.870104e-03 0.037 0.963
B0310 -0.0028164249 -0.0048520436 -8.904523e-04 0.043 0.957
B1029 -0.0003677198 -0.0007876956 -4.280235e-05 0.045 0.955
D0604 -0.0252076749 -0.0512963980 -5.101516e-03 0.050 0.950
D0903 -0.0047866601 -0.0089646580 -1.108197e-03 0.050 0.950
B0218 -0.0116625901 -0.0223161257 -2.061981e-03 0.053 0.947
B0703 -0.0192156802 -0.0373477215 -2.861509e-03 0.059 0.941
D0206 -0.0197688096 -0.0392643553 -3.454538e-03 0.061 0.939
D0701 -0.0051397234 -0.0101283806 -6.650890e-04 0.068 0.932
D0509 -0.0151983776 -0.0301644940 -2.157025e-03 0.070 0.930
D0103 -0.0058062776 -0.0111387262 -5.611137e-04 0.073 0.927
B0903 -0.0040583726 -0.0082299820 -3.182127e-04 0.078 0.922
B0711 -0.0116387135 -0.0239457783 -1.087371e-03 0.079 0.921
D0307 -0.0106690266 -0.0219998777 -7.160822e-04 0.083 0.917
B0702 -0.0164229499 -0.0340972191 -1.406339e-03 0.085 0.915
D0508 -0.0025923862 -0.0054092788 -1.884571e-04 0.089 0.911
D0908 -0.0351466114 -0.0789647021 -1.049356e-03 0.091 0.909
D0912 -0.0009892400 -0.0020007917 -1.425600e-05 0.092 0.908
B0303 -0.0072114512 -0.0152002291 -7.612867e-05 0.097 0.903
#Positively associated
positive <- element_predictions %>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)

#Negatively associated
negative <- element_predictions %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

all_elements <- bind_rows(positive,negative) %>%
  left_join(GIFT_db,by=join_by(trait==Code_element)) %>%
  mutate(trait=factor(trait,levels=c(rev(positive$trait),rev(negative$trait)))) %>%
  mutate(Code_function=factor(Code_function)) %>%
  mutate(element_legend=str_c(trait," - ",Element)) %>%
  mutate(function_legend=str_c(Code_function," - ",Function)) %>%
  select(trait,mean,p1,p9,element_legend,function_legend) %>% 
  unique()

gift_colors <- read_tsv("data/gift_colors.tsv") %>% 
  mutate(legend=str_c(Code_function," - ",Function))  %>% 
  filter(legend %in% all_elements$function_legend)

all_elements %>%
  ggplot(aes(x=mean, y=fct_reorder(element_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-0.15,0.15)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait")

community_elements %>%
    bind_rows() %>%
    pivot_longer(-elevation, names_to = "trait", values_to = "value") %>%
    filter(trait %in% positive$trait) %>%
    mutate(trait=factor(trait, levels=positive$trait)) %>%
    mutate(elevation=as.numeric(elevation)) %>%
    ggplot(aes(x=elevation, y=value)) +
          geom_smooth(method = lm, formula = y ~ x, se = TRUE) +
          #geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), se = TRUE) +
          facet_wrap(~trait, ncol=5, scales="free") +
          theme_minimal() +
          labs(x="Elevation",y="Metabolic Capacity Index")

9.6.1.1 GIFT test visualization

# Aggregate bundle-level GIFTs into the compound level
genome_counts_filt_filt <- tibble::rownames_to_column(genome_counts_filt_filt, var = "genome")
GIFTs_elements_filtered <- elements_table[rownames(elements_table) %in% genome_counts_filt_filt$genome, ]
GIFTs_elements_filtered <- as.data.frame(GIFTs_elements_filtered) %>%
  select_if(~ !is.numeric(.) || sum(.) != 0)

# Get community-weighed average GIFTs per sample
GIFTs_elements_community <- to.community(GIFTs_elements_filtered, genome_counts_filt_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db)

GIFTs_elements_community %>%
    as.data.frame() %>%
    rownames_to_column(var="sample") %>%
    pivot_longer(!sample,names_to="trait",values_to="gift") %>%
    left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
    mutate(functionid = substr(trait, 1, 3)) %>%
    mutate(trait = case_when(
      trait %in% GIFT_db$Code_element ~ GIFT_db$Element[match(trait, GIFT_db$Code_element)],
      TRUE ~ trait
    )) %>%
    mutate(functionid = case_when(
      functionid %in% GIFT_db$Code_function ~ GIFT_db$Function[match(functionid, GIFT_db$Code_function)],
      TRUE ~ functionid
    )) %>%
    mutate(trait=factor(trait,levels=unique(GIFT_db$Element))) %>%
    mutate(functionid=factor(functionid,levels=unique(GIFT_db$Function))) %>%
    ggplot(aes(x=sample,y=trait,fill=gift)) +
        geom_tile(colour="white", linewidth=0.2)+
        scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
        facet_grid(functionid ~ Elevation, scales="free",space="free") +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
              strip.text.y = element_text(angle = 0)) +
        labs(y="Traits",x="Samples",fill="GIFT")

9.6.2 Functional level

functions_table <- elements_table %>%
    to.functions(., GIFT_db) %>%
    as.data.frame()

community_functions <- pred_elevation %>%
  group_by(elevation, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "elevation") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(functions_table,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="elevation")
   })
#max-min option
calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

function_predictions <- map_dfc(community_functions, function(mat) {
      mat %>%
        column_to_rownames(var = "elevation") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_functions[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)
# Positively associated
p2<-function_predictions %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  tt() |>
      style_tt(
        i = which(function_predictions$positive_support < 0.9 & function_predictions$negative_support < 0.1),
        background = "#E5D5B1") |>
      style_tt(
        i = which(function_predictions$negative_support > 0.9 & function_predictions$positive_support < 0.1),
        background = "#B7BCCE")

# Negatively associated (there isn't)
#function_predictions %>%
  #filter(mean <0) %>%
  #arrange(-negative_support) %>%
  #filter(negative_support>=0.9) %>%
  #tt() |>
      #style_tt(
        #i = which(function_predictions$positive_support < 0.9 & function_predictions$negative_support < 0.1),
        #background = "#E5D5B1") |>
      #style_tt(
        #i = which(function_predictions$negative_support > 0.9 & function_predictions$positive_support < 0.1),
        #background = "#B7BCCE")
# Positively associated
p2 %>%
  tt()
tinytable_ggzrtqacrzyht5mrhs2a
trait mean p1 p9 positive_support negative_support
S03 0.026906176 0.0104593848 0.044524683 0.973 0.027
B04 0.018264988 0.0051591436 0.033137776 0.970 0.030
D03 0.010497822 0.0021459296 0.019986199 0.943 0.057
D06 0.003285998 0.0004532722 0.006338923 0.933 0.067
B07 0.012228797 0.0004053295 0.024691062 0.906 0.094
#Negatively associated (there isn't)
all_functions <- function_predictions %>%
  left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
  mutate(trait=factor(trait)) %>%
  mutate(function_legend=str_c(trait," - ",Function)) %>%
  select(trait,mean,p1,p9,function_legend) %>% 
  unique()

gift_colors <- read_tsv("data/gift_colors.tsv") %>% 
  mutate(legend=str_c(Code_function," - ",Function))  %>% 
  filter(legend %in% all_functions$function_legend)

all_functions %>%
  ggplot(aes(x=mean, y=fct_reorder(function_legend, mean), xmin=p1, xmax=p9, color=function_legend)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-0.05,0.06)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = gift_colors$Color) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait") +
      guides(col = guide_legend(ncol = 1))

community_functions %>%
    bind_rows() %>%
    pivot_longer(-elevation, names_to = "trait", values_to = "value") %>%
  filter(trait %in% function_predictions$trait) %>%
  mutate(trait=factor(trait, levels=function_predictions$trait)) %>%
    mutate(elevation=as.numeric(elevation)) %>%
    ggplot(aes(x=elevation, y=value)) +
          geom_smooth(method = lm, formula = y ~ x, se = TRUE) +
          #geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), se = TRUE) +
          facet_wrap(~trait, ncol=5, scales="free") +
          theme_minimal() +
          labs(x="Elevation",y="Metabolic Capacity Index")

9.6.2.1 GIFT test visualization

# Aggregate element-level GIFTs into the function level
GIFTs_functions <- to.functions(GIFTs_elements_filtered, GIFT_db)
functions <- GIFTs_functions %>%
  as.data.frame()

# Get community-weighed average GIFTs per sample
GIFTs_functions_community <- to.community(GIFTs_functions, genome_counts_filt_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db)

GIFTs_functions_community %>%
    as.data.frame() %>%
    rownames_to_column(var="sample") %>%
    pivot_longer(!sample,names_to="trait",values_to="gift") %>%
    left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
    ggplot(aes(x=trait,y=sample,fill=gift)) +
        geom_tile(colour="white", size=0.2)+
        scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
        facet_grid(Elevation ~ ., scales="free",space="free")

9.6.3 Domain level

domains_table <- functions_table %>%
    to.domains(., GIFT_db) %>%
    as.data.frame()

community_domains <- pred_elevation %>%
  group_by(elevation, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "elevation") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(domains_table,.,GIFT_db) %>%
    as.data.frame() %>%
    rownames_to_column(var="elevation")
   })
#max-min option
calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

domain_predictions <- map_dfc(community_domains, function(mat) {
      mat %>%
        column_to_rownames(var = "elevation") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_domains[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)
# Positively associated
p3<-positive_filtered<-domain_predictions %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) 

if (nrow(positive_filtered) > 0) {
  positive_filtered %>%
  tt() |>
      style_tt(
        i = which(domain_predictions$positive_support < 0.9 & domain_predictions$negative_support < 0.1),
        background = "#E5D5B1") |>
      style_tt(
        i = which(domain_predictions$negative_support > 0.9 & domain_predictions$positive_support < 0.1),
        background = "#B7BCCE")
}

# Negatively associated (there isn't)
#domain_predictions %>%
  #filter(mean <0) %>%
  #arrange(-negative_support) %>%
  #filter(negative_support>=0.9) %>%
  #tt() |>
      #style_tt(
        #i = which(domain_predictions$positive_support < 0.9 & domain_predictions$negative_support < 0.1),
        #background = "#E5D5B1") |>
      #style_tt(
        #i = which(domain_predictions$negative_support > 0.9 & domain_predictions$positive_support < 0.1),
        #background = "#B7BCCE")
# Positively associated
p3 %>%
  tt()
tinytable_bofq74swojxdwka9wigj
trait mean p1 p9 positive_support negative_support
Overall 0.006863701 0.0003939157 0.01509651 0.917 0.083
# Negatively associated (there isn't)
all_domains <- domain_predictions %>%
  left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
  mutate(trait=factor(trait)) %>%
  mutate(function_legend=str_c(trait," - ",Function)) %>%
  select(trait,mean,p1,p9) %>% 
  unique()

all_domains %>%
  ggplot(aes(x=mean, y=fct_reorder(trait, mean), xmin=p1, xmax=p9, color=trait)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-0.02,0.03)) +
      geom_vline(xintercept=0) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Domain level") +
      guides(col = guide_legend(ncol = 1))

community_domains %>%
    bind_rows() %>%
    pivot_longer(-elevation, names_to = "trait", values_to = "value") %>%
  filter(trait %in% domain_predictions$trait) %>%
  mutate(trait=factor(trait, levels=domain_predictions$trait)) %>%
    mutate(elevation=as.numeric(elevation)) %>%
    ggplot(aes(x=elevation, y=value)) +
          geom_smooth(method = lm, formula = y ~ x, se = TRUE) +
          #geom_smooth(method = lm, formula = y ~ splines::bs(x, 3), se = TRUE) +
          facet_wrap(~trait, ncol=5, scales="free") +
          theme_minimal() +
          labs(x="Elevation",y="Metabolic Capacity Index")

9.6.3.1 GIFT test visualization

# Aggregate function-level GIFTs into overall Biosynthesis, Degradation and Structural GIFTs
GIFTs_domains <- to.domains(GIFTs_functions, GIFT_db)
domains <- GIFTs_domains %>%
  as.data.frame()

# Get community-weighed average GIFTs per sample
GIFTs_domains_community <- to.community(GIFTs_domains, genome_counts_filt_filt %>% column_to_rownames(., "genome") %>% tss(), GIFT_db)

GIFTs_domains_community %>%
    as.data.frame() %>%
    rownames_to_column(var="sample") %>%
    pivot_longer(!sample,names_to="trait",values_to="gift") %>%
    left_join(sample_metadata, by = join_by(sample == EHI_number)) %>%
    ggplot(aes(x=trait,y=sample,fill=gift)) +
        geom_tile(colour="white", size=0.2)+
        scale_fill_gradientn(colours=rev(c("#d53e4f", "#f46d43", "#fdae61", "#fee08b", "#e6f598", "#abdda4", "#ddf1da")))+
        facet_grid(Elevation ~ ., scales="free",space="free")